{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# TensorFlow Lattice estimators\n",
    "In this tutorial, we will cover basics of TensorFlow Lattice estimators."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import libraries\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import tensorflow as tf\n",
    "import tensorflow_lattice as tfl\n",
    "import tempfile\n",
    "from six.moves import urllib"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Synthetic dataset\n",
    "Here we create a synthetic dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "# Training dataset contains one feature, \"distance\".\n",
    "train_features = {\n",
    "    'distance': np.array([1.0, 1.3, 1.5, 2.0, 2.1, 3.0,\n",
    "                          4.0, 5.0, 1.3, 1.7, 2.5, 2.8,\n",
    "                          4.7, 4.2, 3.5, 4.75, 5.2,\n",
    "                          5.8, 5.9]) * 0.1, \n",
    "}\n",
    "train_labels = np.array([4.8, 4.9, 5.0, 5.0,\n",
    "                         4.8, 3.3, 2.5, 2.0,\n",
    "                         4.7, 4.6, 4.0, 3.2,\n",
    "                         2.12, 2.1, 2.5, 2.2,\n",
    "                         2.3, 2.34, 2.6])\n",
    "plt.scatter(train_features['distance'], train_labels)\n",
    "plt.xlabel('distance')\n",
    "plt.ylabel('user hapiness')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# This function draws two plots.\n",
    "# Firstly, we draw the scatter plot of `distance` vs. `label`.\n",
    "# Secondly, we generate predictions from `estimator` distance ranges in\n",
    "# [xmin, xmax]. \n",
    "def Plot(distance, label, estimator, xmin=0.0, xmax=10.0):\n",
    "    %matplotlib inline\n",
    "    test_features = {\n",
    "        'distance': np.linspace(xmin, xmax, num=100)\n",
    "    }\n",
    "    # Estimator accepts an input in the form of input_fn (callable).\n",
    "    # numpy_input_fn creates an input function that generates a dictionary where\n",
    "    # the key is a feaeture name ('distance'), and the value is a tensor with\n",
    "    # a shape [batch_size, 1].\n",
    "    test_input_fn = tf.estimator.inputs.numpy_input_fn(\n",
    "        x=test_features,\n",
    "        batch_size=1,\n",
    "        num_epochs=1,\n",
    "        shuffle=False)\n",
    "    # Estimator's prediction is 1d tensor with a shape [batch_size]. Since we\n",
    "    # set batch_size == 1 in the above, p['predictions'] will contain only one\n",
    "    # element in each batch, and we fetch this value by p['predictions'][0].\n",
    "    predictions = [p['predictions'][0]\n",
    "                   for p in estimator.predict(input_fn=test_input_fn)]\n",
    "    \n",
    "    # Plot estimator's response and (distance, label) scatter plot.\n",
    "    fig, ax = plt.subplots(1, 1)\n",
    "    ax.plot(test_features['distance'], predictions)\n",
    "    ax.scatter(distance, label)\n",
    "    plt.xlabel('distance')\n",
    "    plt.ylabel('user hapiness')\n",
    "    plt.legend(['prediction', 'data'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# DNN Estimator\n",
    "Now let us define feature columns and use DNN regressor to fit a model."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Specify feature.\n",
    "feature_columns = [\n",
    "    tf.feature_column.numeric_column('distance'),\n",
    "]\n",
    "# Define a neural network legressor.\n",
    "# The first hidden layer contains 30 hidden units, and the second\n",
    "# hidden layer contains 10 hidden units.\n",
    "dnn_estimator = tf.estimator.DNNRegressor(\n",
    "    feature_columns=feature_columns,\n",
    "    hidden_units=[30, 10],\n",
    "    optimizer=tf.train.GradientDescentOptimizer(\n",
    "      learning_rate=0.01,\n",
    "    ),\n",
    ")\n",
    "\n",
    "# Define training input function.\n",
    "# mini-batch size is 10, and we iterate the dataset over\n",
    "# 1000 times.\n",
    "train_input_fn = tf.estimator.inputs.numpy_input_fn(\n",
    "    x=train_features,\n",
    "    y=train_labels,\n",
    "    batch_size=10,\n",
    "    num_epochs=1000,\n",
    "    shuffle=False)\n",
    "\n",
    "tf.logging.set_verbosity(tf.logging.ERROR)\n",
    "# Train this estimator\n",
    "dnn_estimator.train(input_fn=train_input_fn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Response in [0.0, 1.0] range\n",
    "Plot(train_features['distance'], train_labels, dnn_estimator, 0.0, 1.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Now let's increase the prediction range to [0.0, 3.0]\n",
    "# Note) In most machines, the prediction is going up.\n",
    "# However, DNN training does not have a unique solution, so it's possible\n",
    "# not to see this phenomenon.\n",
    "Plot(train_features['distance'], train_labels, dnn_estimator, 0.0, 3.0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# TensorFlow Lattice calibrated linear model\n",
    "Let's use calibrated linear model to fit the data.\n",
    "Since we only have one example, there's no reason to use a lattice."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# TensorFlow Lattice needs feature names to specify\n",
    "# per-feature parameters.\n",
    "feature_names = [fc.name for fc in feature_columns]\n",
    "num_keypoints = 5\n",
    "\n",
    "hparams = tfl.CalibratedLinearHParams(\n",
    "    feature_names=feature_names,\n",
    "    learning_rate=0.1,\n",
    "    num_keypoints=num_keypoints)\n",
    "\n",
    "# input keypoint initializers.\n",
    "# init_fns are dict of (feature_name, callable initializer).\n",
    "keypoints_init_fns = {\n",
    "    'distance': lambda: tfl.uniform_keypoints_for_signal(num_keypoints,\n",
    "                                                         input_min=0.0,\n",
    "                                                         input_max=0.7,\n",
    "                                                         output_min=-1.0,\n",
    "                                                         output_max=1.0)}\n",
    "\n",
    "non_monotnic_estimator = tfl.calibrated_linear_regressor(\n",
    "    feature_columns=feature_columns,\n",
    "    keypoints_initializers_fn=keypoints_init_fns,\n",
    "    hparams=hparams)\n",
    "\n",
    "non_monotnic_estimator.train(input_fn=train_input_fn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# The prediction goes up!\n",
    "Plot(train_features['distance'], train_labels, non_monotnic_estimator, 0.0, 1.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Declare distance as a decreasing monotonic input.\n",
    "hparams.set_feature_param('distance', 'monotonicity', -1)\n",
    "monotonic_estimator = tfl.calibrated_linear_regressor(\n",
    "    feature_columns=feature_columns,\n",
    "    keypoints_initializers_fn=keypoints_init_fns,\n",
    "    hparams=hparams)\n",
    "\n",
    "monotonic_estimator.train(input_fn=train_input_fn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Now it's decreasing.\n",
    "Plot(train_features['distance'], train_labels, monotonic_estimator, 0.0, 1.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Even if the output range becomes larger, the prediction never goes up!\n",
    "Plot(train_features['distance'], train_labels, monotonic_estimator, 0.0, 3.0)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "tf-lattice",
   "language": "python",
   "name": "tf-lattice"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
